We superimposed the figures (the set of landmarks for each fish) using a two-step procedure. First, we used a two-point registration (Bookstein, xxx), with LM-01 and LM-06 to align each fish in an orientation that make the relative coordinate values functionally interpretable. Second, we use a generalized resistant fit (GRF) to re-superimpose the figures for subsequent analysis. For the GRF, we excluded LM-13 as the relative position of the tip of the caudal fin is strongly determined by preservation artifacts. We used the length of the caudal fin, measured as the distance between LM-06 and LM-13, as the x-coordinate for LM-13 in all subsequent analysis. We use a value of zero for the value of the y-coordinate for LM-13 in figures but not in any statistical analysis.
Both Flow and Predator treatments had effects on shape and size. To measure effects of flow on guppy body shape and size, we fit the regression model M1: \(site \times flow\) to each shape coordinate, where \(site\) is the sampling locality of the wildtype grandparent fish and \(flow\) is limited to the “No Flow” and “Flow” treatments (the “Flow + predators” treatment is excluded). Model M1 is size-free but not allometry-free, that is some of the variation in each shape coordinate is associated with size. Consequently, we also fit the regression model M2: \(site \times flow + size\), where \(size\) is the median size of each fish computed from the GRF.
geom_ancova_grouped <- function(m1, group_by = "pred"){
geom_smooth(method = "lm",
mapping = aes(y = predict(m1),
linetype = get(group_by))
)
}
data_folder <- "data/ghalambor"
file_name <- "allff.raw.txt"
file_path <- here(data_folder, file_name)
raw_lm <- fread(file_path) %>%
clean_names()
raw_lm[, caudal_fin := sqrt((c_fx-x6)^2 + (cfy-y6)^2)]
raw_lm[, caudal_fin2 := c_fx-x6]
# temp <- raw_lm[dataset == "greenhouse",
# .SD,
# .SDcols = c("id", "gen", "treatment", "caudal_fin", "caudal_fin2")]
raw_long <- melt(raw_lm,
id.vars = c("id", "dataset"),
measure.vars = list(paste0("x", 1:12),
paste0("y", 1:12)),
variable.name = c("landmark"),
value.name = c("x", "y")
)
qplot(x = x, y = y, data = raw_long) +
coord_fixed()
axis_levels <- c("x", "y")
lm_levels <- 1:12
lm_cols <- do.call(paste0, expand.grid(axis_levels, lm_levels))
lm_cols_x13 <- c(lm_cols, "x13")
lm_cols_13 <- c(lm_cols_x13, "y13")
lm_mat <- raw_lm[, .SD, .SDcols = lm_cols] %>%
as.matrix()
lm_array <- matrix_2_array(lm_mat)
plot_shapes(lm_array)
lm_tpr <- tpr(lm_array, 1, 6)
plot_shapes(lm_tpr)
lm_grf <- grf(lm_tpr)
plot_shapes(lm_grf)
lm_grf_dt <- array_2_matrix(lm_grf) %>%
data.table
colnames(lm_grf_dt) <- lm_cols
keep_cols <- setdiff(names(raw_lm), lm_cols)
shape <- cbind(raw_lm[, .SD, .SDcols = keep_cols],
lm_grf_dt)
setnames(shape, "treatment", "drainage_treatment")
centroid_size <- function(coords){
cols <- 1:length(coords)
even_col <- cols[lapply(cols, "%%", 2) == 0]
odd_col <- cols[lapply(cols, "%%", 2) != 0]
x <- coords[even_col]
y <- coords[odd_col]
xbar <- mean(x)
ybar <- mean(y)
cs <- sqrt(sum((x-xbar)^2 + (y-ybar)^2))
return(cs)
}
cs <- apply(lm_mat, 1, centroid_size)
shape[, centroid_size := cs]
n <- dim(lm_array)[3]
for(i in 1:n){
fig_i <- lm_array[,,i]
shape[i, median_size := median_size(fig_i)]
}
shape[, x13 := mean(x6) + caudal_fin/median_size]
data_folder <- "data/ghalambor"
file_name <- "stream to drainage map.txt"
file_path <- here(data_folder, file_name)
drainage_map <- fread(file_path) %>%
clean_names()
shape1 <- merge(shape, drainage_map,
by = "stream",
sort = FALSE)
shape <- shape1
shape[, stream5 := substr(stream_id, 1, 5) %>% tolower()]
shape[, site := paste(stream5, tolower(pred_state), sep = "-")]
shape[, stream_id := factor(stream_id)]
# names(shape_wide)
shape_long <- melt(shape,
id.vars = c("id", "dataset", "stream_id",
"stream", "pred_state", "gen",
"drainage_treatment"),
measure.vars = list(paste0("x", 1:13),
paste0("y", 1:12)),
variable.name = c("landmark"),
value.name = c("x", "y")
)
# unique(shape_wide$landmark)
qplot(x = x, y = y, data = shape_long) +
coord_fixed()
# unique(shape$dataset)
shape[dataset == "wild", treatment := "Wild"]
shape[dataset == "garden", treatment := "No Flow"]
shape[dataset == "greenhouse" &
drainage_treatment == "pike", treatment := "Flow + pike"]
shape[dataset == "greenhouse" &
drainage_treatment == "no_pike", treatment := "Flow"]
unique(shape$stream_id)
## [1] "arimaRi" "AripoIn" "AripoDC" "ElCedroMain"
## [5] "ElCedroDavid" "ElCedroIn" "GuanapoMain" "JordanTrib"
## [9] "Marianne" "MarianneTrib" "Mauzica" "Oropuche"
## [13] "Paria" "Quare6" "Quare6_2" "QuareII"
## [17] "TurareHigh" "TurareMed" "turaretriblow" "turaremedpred"
## [21] "yaratrib" "ardc" "arin" "guan"
## [25] "orop" "quII" "turu" "quar"
shape[stream_id == "quII", stream_id := "quar"]
f2 <- shape[gen != "f0"]
# recode treatment
recode <- FALSE
if(recode == TRUE){
f2[, treatment_old := treatment]
f2[gen == "p03", treatment := "Flow + pike"]
f2[gen == "p01", treatment := "Flow"]
}
flow_levels <- c("No Flow", "Flow")
pred_levels <- c("Flow", "Flow + pike")
treatment_levels <- union(flow_levels, pred_levels)
treatment_tank_levels <- c("No Flow f2", "Flow p02", "Flow p03",
"Flow + pike p01", "Flow + pike p04")
# recoded
if(recode == TRUE){
treatment_tank_levels <- c("No Flow f2", "Flow p01", "Flow p02", "Flow + pike p03", "Flow + pike p04")
}
f2[, treatment_tank := factor(paste(treatment, gen),
levels = treatment_tank_levels)]
f2[, treatment := factor(treatment,
levels = treatment_levels)]
stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
f2[, stream_id := factor(stream_id, levels = stream_id_levels)]
f2[, pred_state := factor(pred_state, levels = pred_state_levels)]
# create sample_name
f2[, sample_name := paste("fish", .I)]
# f2[, sample_name := .I]
Y <- f2[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
row.names(Y) = f2$sample_name
S <- cov(Y)
E <- eigen(S)$vectors
f2[, pc1 := (Y %*% E[,1])[,1]]
f2[, pc2 := (Y %*% E[,2])[,1]]
f2[, pc3 := (Y %*% E[,3])[,1]]
gg1 <- ggplot(data = f2,
aes(x = pc1,
y = pc2,
color = treatment_tank)) +
geom_point() +
theme_pubr() +
ggtitle("PC1 v PC2: size free") +
scale_color_manual(values = pal_okabe_ito) +
theme(legend.title = element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
NULL
gg2 <- ggplot(data = f2,
aes(x = pc2,
y = pc3,
color = treatment_tank)) +
geom_point() +
theme_pubr() +
ggtitle("PC2 v PC3: size free") +
scale_color_manual(values = pal_okabe_ito) +
theme(legend.title = element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
NULL
plot_grid(gg1, gg2, nrow = 2)
scatter3d(pc3 ~ pc1 + pc2 | treatment_tank,
data = f2,
surface = FALSE)
# unstandardized
dist_mat <- dist(Y, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
# plot(hclust_avg)
# extract dendrogram segment data
dendrogram_data <- dendro_data(hclust_avg)
dendrogram_segments <- dendrogram_data$segments # contains all dendrogram segment data
# get terminal dendrogram segments
dendrogram_ends <- dendrogram_segments %>%
filter(yend == 0) %>% # filter for terminal dendrogram ends
left_join(dendrogram_data$labels, by = "x") %>% # .$labels contains the row names from dist_matrix (i.e., sample_name)
rename(sample_name = label) %>%
left_join(f2, by = "sample_name")
# dataframe now contains only terminal dendrogram segments and merged metadata associated with each iris
gg_tree <- ggplot() +
geom_segment(data = dendrogram_segments,
aes(x = x,
y = y,
xend = xend,
yend = yend)) +
geom_segment(data = dendrogram_ends,
aes(x = x,
y = y.x,
xend = xend,
yend = yend,
# text = paste("sample name: ",
# sample_name, "<br>",
# "species: ", Species),
color = treatment_tank)) +
# test aes is for plotly
scale_color_manual(values = pal_okabe_ito[1:5]) +
scale_y_reverse() +
coord_flip() +
theme_bw() +
theme(legend.position = "none") +
ylab("Distance") + # flipped x and y coordinates for aesthetic reasons
ggtitle("Treatment clusters -- size free")
gg_tree
Y_raw <- f2[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
m1 <- lm(Y ~ median_size, data = f2)
Y <- residuals(m1)
row.names(Y) = f2$sample_name
S <- cov(Y)
E <- eigen(S)$vectors
f2[, pc1_allom_free := (Y %*% E[,1])[,1]]
f2[, pc2_allom_free := (Y %*% E[,2])[,1]]
f2[, pc3_allom_free := (Y %*% E[,3])[,1]]
gg1 <- ggplot(data = f2,
aes(x = pc1_allom_free,
y = pc2_allom_free,
color = treatment_tank)) +
geom_point() +
theme_pubr() +
ggtitle("PC1 v PC2: allometry free") +
scale_color_manual(values = pal_okabe_ito) +
theme(legend.title = element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
NULL
gg2 <- ggplot(data = f2,
aes(x = pc2,
y = pc3,
color = treatment_tank)) +
geom_point() +
theme_pubr() +
ggtitle("PC2 v PC3: allometry free") +
scale_color_manual(values = pal_okabe_ito) +
theme(legend.title = element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
NULL
gg1
gg2
#plot_grid(gg1, gg2, nrow = 2)
# unstandardized
dist_mat <- dist(Y, method = 'euclidean')
hclust_avg <- hclust(dist_mat, method = 'average')
# plot(hclust_avg)
# extract dendrogram segment data
dendrogram_data <- dendro_data(hclust_avg)
dendrogram_segments <- dendrogram_data$segments # contains all dendrogram segment data
# get terminal dendrogram segments
dendrogram_ends <- dendrogram_segments %>%
filter(yend == 0) %>% # filter for terminal dendrogram ends
left_join(dendrogram_data$labels, by = "x") %>% # .$labels contains the row names from dist_matrix (i.e., sample_name)
rename(sample_name = label) %>%
left_join(f2, by = "sample_name")
# dataframe now contains only terminal dendrogram segments and merged metadata associated with each iris
gg_tree <- ggplot() +
geom_segment(data = dendrogram_segments,
aes(x = x,
y = y,
xend = xend,
yend = yend)) +
geom_segment(data = dendrogram_ends,
aes(x = x,
y = y.x,
xend = xend,
yend = yend,
# text = paste("sample name: ",
# sample_name, "<br>",
# "species: ", Species),
color = treatment_tank)) +
# test aes is for plotly
scale_color_manual(values = pal_okabe_ito[1:5]) +
scale_y_reverse() +
coord_flip() +
theme_bw() +
theme(legend.position = "none") +
ylab("Distance") + # flipped x and y coordinates for aesthetic reasons
ggtitle("Treatment clusters -- unstandardized")
gg_tree
Notes
gg1 <- ggplot(data = f2,
aes(x = median_size,
y = pc1,
color = treatment_tank)) +
geom_point() +
theme_pubr() +
ggtitle("PC1 v median size: size free") +
scale_color_manual(values = pal_okabe_ito) +
theme(legend.title = element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
NULL
gg2 <- ggplot(data = f2,
aes(x = median_size,
y = pc1_allom_free,
color = treatment_tank)) +
geom_point() +
theme_pubr() +
ggtitle("PC1 v median size: allometry free") +
scale_color_manual(values = pal_okabe_ito) +
theme(legend.title = element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
NULL
gg1
gg2
pd <- position_dodge(0.4)
gg_univariate <- list()
p <- length(lm_cols_x13)
for(j in 1:p){
gg <- ggplot(data = f2,
aes(x = treatment_tank,
y = get(lm_cols_x13[j]),
color = stream_id)) +
geom_point(position = pd) +
ylab(lm_cols_x13[j]) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
gg_univariate[[length(gg_univariate)+1]] <- ggplotGrob(gg)
}
names(gg_univariate) <- lm_cols_x13
plot_grid(gg_univariate[[1]],
gg_univariate[[2]], nrow = 2)
plot_grid(gg_univariate[[3]],
gg_univariate[[4]], nrow = 2)
plot_grid(gg_univariate[[5]],
gg_univariate[[6]], nrow = 2)
plot_grid(gg_univariate[[7]],
gg_univariate[[8]], nrow = 2)
plot_grid(gg_univariate[[9]],
gg_univariate[[10]], nrow = 2)
plot_grid(gg_univariate[[11]],
gg_univariate[[12]], nrow = 2)
plot_grid(gg_univariate[[13]],
gg_univariate[[14]], nrow = 2)
plot_grid(gg_univariate[[15]],
gg_univariate[[16]], nrow = 2)
plot_grid(gg_univariate[[17]],
gg_univariate[[18]], nrow = 2)
plot_grid(gg_univariate[[19]],
gg_univariate[[20]], nrow = 2)
plot_grid(gg_univariate[[21]],
gg_univariate[[22]], nrow = 2)
plot_grid(gg_univariate[[23]],
gg_univariate[[24]], nrow = 2)
plot_grid(gg_univariate[[25]])
contrast_matrix <- diag(18)
stream_ids <- paste0(levels(f2$stream_id), "_")
flow_levels <- c("noflow", "flow", "flow+pike")
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))
row.names(contrast_matrix) <- contrast_labels
lm_pairs <- list(
"ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
contrast_matrix["ardc_noflow",],
"arin:flow - no flow" = contrast_matrix["arin_flow",] -
contrast_matrix["arin_noflow",],
"orop:flow - no flow" = contrast_matrix["orop_flow",] -
contrast_matrix["orop_noflow",],
"quar:flow - no flow" = contrast_matrix["quar_flow",] -
contrast_matrix["quar_noflow",],
"turu:flow - no flow" = contrast_matrix["turu_flow",] -
contrast_matrix["turu_noflow",],
"guan:flow - no flow" = contrast_matrix["guan_flow",] -
contrast_matrix["guan_noflow",],
"ardc:pred - no pred" = contrast_matrix["ardc_flow+pike",] -
contrast_matrix["ardc_flow",],
"arin:pred - no pred" = contrast_matrix["arin_flow+pike",] -
contrast_matrix["arin_flow",],
"orop:pred - no pred" = contrast_matrix["orop_flow+pike",] -
contrast_matrix["orop_flow",],
"quar:pred - no pred" = contrast_matrix["quar_flow+pike",] -
contrast_matrix["quar_flow",],
"turu:pred - no pred" = contrast_matrix["turu_flow+pike",] -
contrast_matrix["turu_flow",],
"guan:pred - no pred" = contrast_matrix["guan_flow+pike",] -
contrast_matrix["guan_flow",]
)
contrast_matrix <- diag(30)
stream_ids <- paste0(levels(f2$stream_id), "_")
flow_levels <- c("noflow", "flow-p02", "flow-p03", "flow+pike-p01", "flow+pike-p04")
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))
row.names(contrast_matrix) <- contrast_labels
lm_pairs <- list(
"ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
contrast_matrix["ardc_noflow",],
"arin:flow - no flow" = contrast_matrix["arin_flow",] -
contrast_matrix["arin_noflow",],
"orop:flow - no flow" = contrast_matrix["orop_flow",] -
contrast_matrix["orop_noflow",],
"quar:flow - no flow" = contrast_matrix["quar_flow",] -
contrast_matrix["quar_noflow",],
"turu:flow - no flow" = contrast_matrix["turu_flow",] -
contrast_matrix["turu_noflow",],
"guan:flow - no flow" = contrast_matrix["guan_flow",] -
contrast_matrix["guan_noflow",],
"ardc:pred - no pred" = contrast_matrix["ardc_flow+pike",] -
contrast_matrix["ardc_flow",],
"arin:pred - no pred" = contrast_matrix["arin_flow+pike",] -
contrast_matrix["arin_flow",],
"orop:pred - no pred" = contrast_matrix["orop_flow+pike",] -
contrast_matrix["orop_flow",],
"quar:pred - no pred" = contrast_matrix["quar_flow+pike",] -
contrast_matrix["quar_flow",],
"turu:pred - no pred" = contrast_matrix["turu_flow+pike",] -
contrast_matrix["turu_flow",],
"guan:pred - no pred" = contrast_matrix["guan_flow+pike",] -
contrast_matrix["guan_flow",]
)
lm_adj <- list()
lm_adj_emm <- list()
lm_adj_pairs <- list()
for(j in 1:length(lm_cols_x13)){
# model fit
form_j <- paste0(lm_cols_x13[j],
" ~ stream_id * treatment_tank + median_size") %>%
formula()
m1 <- lm(form_j, data = f2)
# inference
m1_emm <- emmeans(m1,
specs = c("stream_id", "treatment_tank"))
# m1_pairs <- contrast(m1_emm,
# method = lm_pairs,
# adjust = "none") %>%
# summary(infer = TRUE)
# save to list
lm_adj[[length(lm_adj)+1]] <- m1
lm_adj_emm[[length(lm_adj_emm)+1]] <- m1_emm
# lm_adj_pairs[[length(lm_adj_pairs)+1]] <- m1_pairs
}
names(lm_adj) <- lm_cols_x13
names(lm_adj_emm) <- lm_cols_x13
#names(lm_adj_pairs) <- lm_cols_x13
lm_gg <- list()
for(j in 1:length(lm_cols_x13)){
m1 <- lm_adj[[j]]
m1_emm <- lm_adj_emm[[j]] %>%
summary()
# m1_pairs <- lm_adj_pairs[[j]]
pd <- position_dodge(0.2)
gg <- ggplot(data = m1_emm,
aes(x = stream_id,
y = emmean,
color = treatment_tank)) +
geom_point(position = pd) +
theme_pubr() +
ggtitle(lm_cols_x13[j]) +
scale_color_manual(values = pal_okabe_ito) +
theme(legend.title = element_blank()) +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
NULL
lm_gg[[length(lm_gg)+1]] <- gg
}
names(lm_gg) <- lm_cols_x13
plot_grid(lm_gg[[1]],
lm_gg[[2]], ncol = 2)
plot_grid(lm_gg[[3]],
lm_gg[[4]], ncol = 2)
plot_grid(lm_gg[[5]],
lm_gg[[6]], ncol = 2)
plot_grid(lm_gg[[7]],
lm_gg[[8]], ncol = 2)
plot_grid(lm_gg[[9]],
lm_gg[[10]], ncol = 2)
plot_grid(lm_gg[[11]],
lm_gg[[12]], ncol = 2)
plot_grid(lm_gg[[13]],
lm_gg[[14]], ncol = 2)
plot_grid(lm_gg[[15]],
lm_gg[[16]], ncol = 2)
plot_grid(lm_gg[[17]],
lm_gg[[18]], ncol = 2)
plot_grid(lm_gg[[19]],
lm_gg[[20]], ncol = 2)
plot_grid(lm_gg[[21]],
lm_gg[[22]], ncol = 2)
plot_grid(lm_gg[[23]],
lm_gg[[24]], ncol = 2)
plot_grid(lm_gg[[25]])
Notes
flow_gg <- list()
p <- length(lm_cols_x13)
for(j in 1:p){
m1 <- lm_adj[[j]]
gg <- ggplot(data = f2,
aes(x = median_size,
y = get(lm_cols_x13[j]),
color = stream_id)) +
geom_point() +
geom_ancova_grouped(m1, group_by = "treatment_tank") +
ylab(lm_cols_x13[j]) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
flow_gg[[length(flow_gg)+1]] <- ggplotGrob(gg)
}
names(flow_gg) <- lm_cols_x13
plot_grid(flow_gg[[1]],
flow_gg[[2]], nrow = 2)
plot_grid(flow_gg[[3]],
flow_gg[[4]], nrow = 2)
plot_grid(flow_gg[[5]],
flow_gg[[6]], nrow = 2)
plot_grid(flow_gg[[7]],
flow_gg[[8]], nrow = 2)
plot_grid(flow_gg[[9]],
flow_gg[[10]], nrow = 2)
plot_grid(flow_gg[[11]],
flow_gg[[12]], nrow = 2)
plot_grid(flow_gg[[13]],
flow_gg[[14]], nrow = 2)
plot_grid(flow_gg[[15]],
flow_gg[[16]], nrow = 2)
plot_grid(flow_gg[[17]],
flow_gg[[18]], nrow = 2)
plot_grid(flow_gg[[19]],
flow_gg[[20]], nrow = 2)
plot_grid(flow_gg[[21]],
flow_gg[[22]], nrow = 2)
plot_grid(flow_gg[[23]],
flow_gg[[24]], nrow = 2)
plot_grid(flow_gg[[25]])
flow_levels <- c("No Flow", "Flow")
flow_f2 <- shape[treatment %in% flow_levels &
gen != "f0"]
flow_f2[, flow := factor(treatment,
levels = flow_levels)]
stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
flow_f2[, stream_id := factor(stream_id, levels = stream_id_levels)]
flow_f2[, pred_state := factor(pred_state, levels = pred_state_levels)]
contrast_matrix <- diag(12)
stream_ids <- paste0(levels(flow_f2$stream_id), "_")
flow_levels <- c("noflow", "flow" )
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))
row.names(contrast_matrix) <- contrast_labels
flow_pairs <- list(
"ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
contrast_matrix["ardc_noflow",],
"arin:flow - no flow" = contrast_matrix["arin_flow",] -
contrast_matrix["arin_noflow",],
"orop:flow - no flow" = contrast_matrix["orop_flow",] -
contrast_matrix["orop_noflow",],
"quar:flow - no flow" = contrast_matrix["quar_flow",] -
contrast_matrix["quar_noflow",],
"turu:flow - no flow" = contrast_matrix["turu_flow",] -
contrast_matrix["turu_noflow",],
"guan:flow - no flow" = contrast_matrix["guan_flow",] -
contrast_matrix["guan_noflow",]
)
flow_lm <- list()
flow_lm_emm <- list()
flow_lm_pairs <- list()
for(j in 1:length(lm_cols_x13)){
# model fit
form_j <- paste0(lm_cols_x13[j],
" ~ stream_id * flow") %>%
formula()
m1 <- lm(form_j, data = flow_f2)
# inference
m1_emm <- emmeans(m1,
specs = c("stream_id", "flow"))
m1_pairs <- contrast(m1_emm,
method = flow_pairs,
adjust = "none") %>%
summary(infer = TRUE)
# save to list
flow_lm[[length(flow_lm)+1]] <- m1
flow_lm_emm[[length(flow_lm_emm)+1]] <- m1_emm
flow_lm_pairs[[length(flow_lm_pairs)+1]] <- m1_pairs
}
names(flow_lm) <- lm_cols_x13
names(flow_lm_emm) <- lm_cols_x13
names(flow_lm_pairs) <- lm_cols_x13
flow_gg <- list()
for(j in 1:length(lm_cols_x13)){
m1 <- flow_lm[[j]]
m1_emm <- flow_lm_emm[[j]]
m1_pairs <- flow_lm_pairs[[j]]
gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
y_label = lm_cols_x13[j])
flow_gg[[length(flow_gg)+1]] <- gg
}
names(flow_gg) <- lm_cols_x13
plot_grid(flow_gg[[1]],
flow_gg[[2]], ncol = 2)
plot_grid(flow_gg[[3]],
flow_gg[[4]], ncol = 2)
plot_grid(flow_gg[[5]],
flow_gg[[6]], ncol = 2)
plot_grid(flow_gg[[7]],
flow_gg[[8]], ncol = 2)
plot_grid(flow_gg[[9]],
flow_gg[[10]], ncol = 2)
plot_grid(flow_gg[[11]],
flow_gg[[12]], ncol = 2)
plot_grid(flow_gg[[13]],
flow_gg[[14]], ncol = 2)
plot_grid(flow_gg[[15]],
flow_gg[[16]], ncol = 2)
plot_grid(flow_gg[[17]],
flow_gg[[18]], ncol = 2)
plot_grid(flow_gg[[19]],
flow_gg[[20]], ncol = 2)
plot_grid(flow_gg[[21]],
flow_gg[[22]], ncol = 2)
plot_grid(flow_gg[[23]],
flow_gg[[24]], ncol = 2)
plot_grid(flow_gg[[25]])
Notes
flow_lm_adj <- list()
flow_lm_adj_emm <- list()
flow_lm_adj_pairs <- list()
for(j in 1:length(lm_cols_x13)){
# model fit
form_j <- paste0(lm_cols_x13[j],
" ~ stream_id * flow + median_size") %>%
formula()
m1 <- lm(form_j, data = flow_f2)
# inference
m1_emm <- emmeans(m1,
specs = c("stream_id", "flow"))
m1_pairs <- contrast(m1_emm,
method = flow_pairs,
adjust = "none") %>%
summary(infer = TRUE)
# save to list
flow_lm_adj[[length(flow_lm_adj)+1]] <- m1
flow_lm_adj_emm[[length(flow_lm_adj_emm)+1]] <- m1_emm
flow_lm_adj_pairs[[length(flow_lm_adj_pairs)+1]] <- m1_pairs
}
names(flow_lm_adj) <- lm_cols_x13
names(flow_lm_adj_emm) <- lm_cols_x13
names(flow_lm_adj_pairs) <- lm_cols_x13
flow_gg <- list()
for(j in 1:length(lm_cols_x13)){
m1 <- flow_lm_adj[[j]]
m1_emm <- flow_lm_adj_emm[[j]]
m1_pairs <- flow_lm_adj_pairs[[j]]
gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
y_label = lm_cols_x13[j])
flow_gg[[length(flow_gg)+1]] <- gg
}
names(flow_gg) <- lm_cols_x13
plot_grid(flow_gg[[1]],
flow_gg[[2]], ncol = 2)
plot_grid(flow_gg[[3]],
flow_gg[[4]], ncol = 2)
plot_grid(flow_gg[[5]],
flow_gg[[6]], ncol = 2)
plot_grid(flow_gg[[7]],
flow_gg[[8]], ncol = 2)
plot_grid(flow_gg[[9]],
flow_gg[[10]], ncol = 2)
plot_grid(flow_gg[[11]],
flow_gg[[12]], ncol = 2)
plot_grid(flow_gg[[13]],
flow_gg[[14]], ncol = 2)
plot_grid(flow_gg[[15]],
flow_gg[[16]], ncol = 2)
plot_grid(flow_gg[[17]],
flow_gg[[18]], ncol = 2)
plot_grid(flow_gg[[19]],
flow_gg[[20]], ncol = 2)
plot_grid(flow_gg[[21]],
flow_gg[[22]], ncol = 2)
plot_grid(flow_gg[[23]],
flow_gg[[24]], ncol = 2)
plot_grid(flow_gg[[25]])
Notes
flow_gg <- list()
p <- length(lm_cols_x13)
for(j in 1:p){
m1 <- flow_lm_adj[[j]]
gg <- ggplot(data = flow_f2,
aes(x = median_size,
y = get(lm_cols_x13[j]),
color = stream_id)) +
geom_point() +
geom_ancova_grouped(m1, group_by = "flow") +
ylab(lm_cols_x13[j]) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
flow_gg[[length(flow_gg)+1]] <- ggplotGrob(gg)
}
names(flow_gg) <- lm_cols_x13
plot_grid(flow_gg[[1]],
flow_gg[[2]], nrow = 2)
plot_grid(flow_gg[[3]],
flow_gg[[4]], nrow = 2)
plot_grid(flow_gg[[5]],
flow_gg[[6]], nrow = 2)
plot_grid(flow_gg[[7]],
flow_gg[[8]], nrow = 2)
plot_grid(flow_gg[[9]],
flow_gg[[10]], nrow = 2)
plot_grid(flow_gg[[11]],
flow_gg[[12]], nrow = 2)
plot_grid(flow_gg[[13]],
flow_gg[[14]], nrow = 2)
plot_grid(flow_gg[[15]],
flow_gg[[16]], nrow = 2)
plot_grid(flow_gg[[17]],
flow_gg[[18]], nrow = 2)
plot_grid(flow_gg[[19]],
flow_gg[[20]], nrow = 2)
plot_grid(flow_gg[[21]],
flow_gg[[22]], nrow = 2)
plot_grid(flow_gg[[23]],
flow_gg[[24]], nrow = 2)
plot_grid(flow_gg[[25]])
Notes
flow_group_means <- data.table()
for(j in 1:length(lm_cols_x13)){
m1_emm <- flow_lm_emm[[j]] %>%
summary()
flow_group_means <- rbind(
flow_group_means,
data.table(coord = lm_cols_x13[j],
m1_emm)
)
}
flow_group_means[, coord := factor(coord, levels = lm_cols_x13)]
flow_group_means[, axis := substr(coord, 1, 1)]
flow_group_means[, lm := substr(coord, 2, nchar(as.character(coord)))]
flow_group_means[, lm := factor(lm,
levels = as.character(1:13))]
# cast to wide with x and y cols for each landmark
flow_group_means_wide <- dcast(flow_group_means,
flow + stream_id + lm ~ axis,
value.var = "emmean")
flow_group_means_wide[lm == "13", y := 0]
flow_treatment_means_wide <- flow_group_means_wide[, .(
x = mean(x),
y = mean(y)),
by = .(flow, lm)]
# replace eye with lm1 in treatment means table
flow_treatment_means_wide[lm == "12",
x := flow_treatment_means_wide[lm == "1", x]]
flow_treatment_means_wide[lm == "12",
y := flow_treatment_means_wide[lm == "1", y]]
gg1 <- ggplot(data = flow_group_means_wide,
aes(x = x,
y = y,
color = flow)) +
geom_point(size = 1) +
geom_path(data = flow_treatment_means_wide[lm != "13"],
aes(color = flow)) +
coord_fixed() +
theme_pubr() +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
gg1
Figure 1. Model mean shape coordinates
fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
flow_grandmean <- flow_group_means_wide[, .(x = mean(x),
y = mean(y)),
by = .(lm, flow)]
tps_cols <- c("x", "y")
x <- flow_grandmean[flow == "Flow", .SD, .SDcols = tps_cols] %>%
as.matrix
y <- flow_grandmean[flow == "No Flow", .SD, .SDcols = tps_cols] %>%
as.matrix
tps(x, y,
gridlines = 50,
multiple = 2)
flow_group_means_adj <- data.table()
for(j in 1:length(lm_cols_x13)){
m1_emm <- flow_lm_adj_emm[[j]] %>%
summary()
flow_group_means_adj <- rbind(
flow_group_means_adj,
data.table(coord = lm_cols_x13[j],
m1_emm)
)
}
flow_group_means_adj[, coord := factor(coord, levels = lm_cols_x13)]
flow_group_means_adj[, axis := substr(coord, 1, 1)]
flow_group_means_adj[, lm := substr(coord, 2, nchar(as.character(coord)))]
flow_group_means_adj[, lm := factor(lm,
levels = as.character(1:13))]
# cast to wide with x and y cols for each landmark
flow_group_means_wide_adj <- dcast(flow_group_means_adj,
flow + stream_id + lm ~ axis,
value.var = "emmean")
flow_group_means_wide_adj[lm == "13", y := 0]
flow_treatment_means_wide_adj <- flow_group_means_wide_adj[, .(
x = mean(x),
y = mean(y)),
by = .(flow, lm)]
# replace eye with lm1 in treatment means table
flow_treatment_means_wide_adj[lm == "12",
x := flow_treatment_means_wide_adj[lm == "1", x]]
flow_treatment_means_wide_adj[lm == "12",
y := flow_treatment_means_wide_adj[lm == "1", y]]
gg2 <- ggplot(data = flow_group_means_wide_adj,
aes(x = x,
y = y,
color = flow)) +
geom_point(size = 1) +
geom_path(data = flow_treatment_means_wide_adj[lm != "13"],
aes(color = flow)) +
coord_fixed() +
theme_pubr() +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
gg2
Figure 2. Model mean shape coordinates
fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
gg <- plot_grid(gg1, gg2, nrow = 2)
gg
Figure 3. A. Model mean shape coordinates for No Flow and Flow treatments. B. Size-adjusted mean shape coordinates for No Flow and Flow treatments using median size as the covariate.
fig_cap <- "A. Model mean shape coordinates for No Flow and Flow treatments. B. Size-adjusted mean shape coordinates for No Flow and Flow treatments using median size as the covariate."
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
# get treament means as means of group means
flow_treatment_means <- flow_group_means[, .(emmean = mean(emmean)),
by = .(coord, flow)]
flow_loadings <- dcast(flow_treatment_means,
coord ~ flow,
value.var = "emmean"
)
colnames(flow_loadings) <- c("lm", "flow", "no_flow")
# d is the raw difference
flow_loadings[, d := flow - no_flow]
# e is d scaled by length of d. This is like an eigenvector
flow_loadings[, e := d/sqrt(c(t(d) %*% d))]
# note - high flow fish have more negative scores so reverse direction
# so that high scores = high flow
flow_loadings[, e := -e]
e <- c(flow_loadings[, e])
t(e) %*% e # check to see if equal to one
## [,1]
## [1,] 1
# n x 2p matrix of coordinates
X <- flow_f2[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
flow_f2[, flow_score := (X %*% e)[,1]]
flow_f2[, flow_score := flow_score - mean(flow_score)]
# n_stream_id x 2p matrix of coordinates + flow_score
flow_f2_group_means_wide <- flow_f2[, lapply(.SD, mean),
by = .(stream_id, treatment),
.SDcols = c(lm_cols_x13, "flow_score") ]
total_var <- sum(diag(cov(flow_f2[, .SD, .SDcols = lm_cols_x13])))
flow_var <- sd(flow_f2[, flow_score])^2
flow_variance_frac <- flow_var/total_var
flow_variance_frac
## [1] 0.3335456
ycols <- c("flow_score", lm_cols_x13)
r <- cor(flow_f2_group_means_wide[, .SD, .SDcols = ycols])[-1,1]
flow_loadings[, r := r]
flow_loadings %>%
kable (digits = c(1,4,4,4,2,2)) %>%
kable_styling(full_width = FALSE)
| lm | flow | no_flow | d | e | r |
|---|---|---|---|---|---|
| x1 | 0.0319 | -0.0193 | 0.0512 | -0.42 | -0.91 |
| y1 | -0.0216 | -0.0229 | 0.0014 | -0.01 | -0.08 |
| x2 | 0.5133 | 0.5655 | -0.0522 | 0.43 | 0.93 |
| y2 | 0.2032 | 0.1924 | 0.0108 | -0.09 | -0.32 |
| x3 | 1.4975 | 1.4831 | 0.0144 | -0.12 | -0.68 |
| y3 | 0.2773 | 0.2628 | 0.0146 | -0.12 | -0.45 |
| x4 | 1.6952 | 1.7680 | -0.0728 | 0.60 | 0.97 |
| y4 | 0.2224 | 0.2054 | 0.0170 | -0.14 | -0.63 |
| x5 | 2.3938 | 2.3823 | 0.0116 | -0.10 | -0.59 |
| y5 | 0.1789 | 0.1804 | -0.0015 | 0.01 | 0.24 |
| x6 | 2.4680 | 2.4733 | -0.0053 | 0.04 | -0.16 |
| y6 | -0.0126 | 0.0019 | -0.0144 | 0.12 | 0.83 |
| x7 | 2.3508 | 2.3522 | -0.0015 | 0.01 | -0.23 |
| y7 | -0.1993 | -0.1973 | -0.0020 | 0.02 | 0.18 |
| x8 | 1.6811 | 1.6947 | -0.0137 | 0.11 | 0.79 |
| y8 | -0.2379 | -0.2293 | -0.0085 | 0.07 | 0.26 |
| x9 | 1.4760 | 1.4610 | 0.0150 | -0.12 | -0.72 |
| y9 | -0.3130 | -0.3130 | 0.0000 | 0.00 | -0.17 |
| x10 | 1.1323 | 1.1466 | -0.0143 | 0.12 | 0.71 |
| y10 | -0.3753 | -0.3655 | -0.0098 | 0.08 | 0.30 |
| x11 | 0.4800 | 0.4813 | -0.0013 | 0.01 | 0.11 |
| y11 | -0.2771 | -0.2817 | 0.0046 | -0.04 | -0.58 |
| x12 | 0.2281 | 0.2256 | 0.0025 | -0.02 | -0.17 |
| y12 | -0.0259 | -0.0082 | -0.0177 | 0.15 | 0.80 |
| x13 | 3.1933 | 3.2333 | -0.0400 | 0.33 | 0.81 |
flow_score_lm1 <- lm(flow_score ~ stream_id * flow,
data = flow_f2)
ggcheck_the_model(flow_score_lm1) # increased variance in high flow groups
flow_score_lm1_emm <- emmeans(flow_score_lm1,
specs = c("stream_id", "flow"))
flow_score_lm1_pairs <- contrast(flow_score_lm1_emm,
method = flow_pairs,
adjust = "none"
)
# get treament means as means of group means
flow_treatment_means_adj <- flow_group_means_adj[, .(emmean = mean(emmean)),
by = .(coord, flow)]
flow_loadings_adj <- dcast(flow_treatment_means_adj,
coord ~ flow,
value.var = "emmean"
)
colnames(flow_loadings_adj) <- c("lm", "flow", "no_flow")
# d is the raw difference
flow_loadings_adj[, d := flow - no_flow]
# e is d scaled by length of d. This is like an eigenvector
flow_loadings_adj[, e := d/sqrt(c(t(d) %*% d))]
# note - high flow fish have more negative scores so reverse direction
# so that high scores = high flow
flow_loadings_adj[, e := -e]
e <- c(flow_loadings_adj[, e])
t(e) %*% e # check to see if equal to one
## [,1]
## [1,] 1
# n x 2p matrix of coordinates
X <- flow_f2[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
flow_f2[, flow_score_adj := (X %*% e)[,1]]
flow_f2[, flow_score_adj := flow_score_adj - mean(flow_score_adj)]
# n_stream_id x 2p matrix of coordinates + flow_score
flow_f2_group_means_wide_adj <- flow_f2[, lapply(.SD, mean),
by = .(stream_id, treatment),
.SDcols = c(lm_cols_x13, "flow_score_adj") ]
total_var <- sum(diag(cov(flow_f2[, .SD, .SDcols = lm_cols_x13])))
flow_var <- sd(flow_f2[, flow_score_adj])^2
flow_variance_frac <- flow_var/total_var
flow_variance_frac
## [1] 0.2802826
ycols <- c("flow_score_adj", lm_cols_x13)
r <- cor(flow_f2_group_means_wide_adj[, .SD, .SDcols = ycols])[-1,1]
flow_loadings_adj[, r := r]
flow_loadings_adj %>%
kable (digits = c(1,4,4,4,2,2)) %>%
kable_styling(full_width = FALSE)
| lm | flow | no_flow | d | e | r |
|---|---|---|---|---|---|
| x1 | 0.0295 | -0.0144 | 0.0439 | -0.44 | -0.93 |
| y1 | -0.0219 | -0.0222 | 0.0003 | 0.00 | -0.09 |
| x2 | 0.5172 | 0.5578 | -0.0407 | 0.41 | 0.93 |
| y2 | 0.2037 | 0.1913 | 0.0124 | -0.12 | -0.33 |
| x3 | 1.4973 | 1.4837 | 0.0136 | -0.14 | -0.69 |
| y3 | 0.2758 | 0.2658 | 0.0100 | -0.10 | -0.46 |
| x4 | 1.6990 | 1.7604 | -0.0614 | 0.62 | 0.97 |
| y4 | 0.2208 | 0.2087 | 0.0121 | -0.12 | -0.64 |
| x5 | 2.3938 | 2.3823 | 0.0116 | -0.12 | -0.56 |
| y5 | 0.1799 | 0.1785 | 0.0014 | -0.01 | 0.20 |
| x6 | 2.4676 | 2.4739 | -0.0063 | 0.06 | -0.12 |
| y6 | -0.0109 | -0.0014 | -0.0095 | 0.10 | 0.82 |
| x7 | 2.3504 | 2.3529 | -0.0024 | 0.02 | -0.21 |
| y7 | -0.1982 | -0.1996 | 0.0014 | -0.01 | 0.16 |
| x8 | 1.6801 | 1.6967 | -0.0166 | 0.17 | 0.78 |
| y8 | -0.2374 | -0.2303 | -0.0071 | 0.07 | 0.30 |
| x9 | 1.4744 | 1.4643 | 0.0101 | -0.10 | -0.74 |
| y9 | -0.3126 | -0.3138 | 0.0012 | -0.01 | -0.14 |
| x10 | 1.1287 | 1.1538 | -0.0251 | 0.25 | 0.74 |
| y10 | -0.3756 | -0.3650 | -0.0106 | 0.11 | 0.32 |
| x11 | 0.4811 | 0.4790 | 0.0021 | -0.02 | 0.08 |
| y11 | -0.2782 | -0.2796 | 0.0014 | -0.01 | -0.57 |
| x12 | 0.2276 | 0.2268 | 0.0008 | -0.01 | -0.17 |
| y12 | -0.0262 | -0.0076 | -0.0186 | 0.19 | 0.80 |
| x13 | 3.2022 | 3.2155 | -0.0133 | 0.13 | 0.77 |
flow_score_adj_lm1 <- lm(flow_score_adj ~ stream_id * flow,
data = flow_f2)
ggcheck_the_model(flow_score_adj_lm1) # increased variance in high flow groups
flow_score_adj_lm1_emm <- emmeans(flow_score_adj_lm1,
specs = c("stream_id", "flow"))
flow_score_adj_lm1_pairs <- contrast(flow_score_adj_lm1_emm,
method = flow_pairs,
adjust = "none"
)
Plot flow score by stream_id:treatment combinations
gg1 <- ggplot_the_response(flow_score_lm1, flow_score_lm1_emm, flow_score_lm1_pairs,
y_label = "Flow Score")
gg2 <- ggplot_the_response(flow_score_adj_lm1,
flow_score_adj_lm1_emm,
flow_score_adj_lm1_pairs,
y_label = "Flow Score")
plot_grid(gg1, gg2, ncol = 2)
Figure 4. Flow scores
fig_cap <- "Flow scores"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
qplot(x = flow_loadings[, e], y = flow_loadings_adj[, e])
lm1 <- lm(median_size ~ flow * stream_id,
data = flow_f2)
lm2 <- lm(median_size ~ flow + stream_id,
data = flow_f2)
coef(summary(lm2)) %>%
kable(digits = c(3,3,2,5)) %>%
kable_styling()
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 0.986 | 0.015 | 66.39 | 0.00000 |
| flowFlow | -0.083 | 0.012 | -7.21 | 0.00000 |
| stream_idarin | -0.012 | 0.019 | -0.62 | 0.53498 |
| stream_idorop | 0.036 | 0.020 | 1.81 | 0.07172 |
| stream_idquar | 0.018 | 0.020 | 0.90 | 0.36701 |
| stream_idturu | 0.052 | 0.020 | 2.61 | 0.00989 |
| stream_idguan | -0.124 | 0.021 | -6.00 | 0.00000 |
lmm_4 <- lmer(flow_score ~ flow + median_size +
(1|stream_id) +
(1|stream_id:flow),
data = flow_f2)
lmm_4_emm <- emmeans(lmm_4, specs = "flow")
coef(summary(lmm_4)) %>%
kable(digits = c(3,3,1,1,8)) %>%
kable_styling()
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 0.243 | 0.039 | 128.3 | 6.2 | 1.000e-08 |
| flowFlow | 0.098 | 0.010 | 6.2 | 9.7 | 5.801e-05 |
| median_size | -0.294 | 0.039 | 156.0 | -7.6 | 0.000e+00 |
m1 <- lm(flow_score ~ flow*stream*median_size, data = flow_f2)
gg <- ggplot(data = flow_f2,
aes(x = median_size,
y = flow_score,
color = stream_id,
shape = flow)) +
geom_ancova_grouped(m1, group_by = "flow") +
geom_point(size = 2) +
scale_shape_manual(values=c(16, 1)) +
scale_color_manual(values = pal_okabe_ito_blue) +
theme_pubr() +
NULL
gg
m2 <- lm(flow_score ~ flow*stream + median_size, data = flow_f2)
gg <- ggplot(data = flow_f2,
aes(x = median_size,
y = flow_score,
color = stream_id,
shape = flow)) +
geom_ancova_grouped(m2, group_by = "flow") +
geom_point(size = 2) +
scale_shape_manual(values=c(16, 1)) +
scale_color_manual(values = pal_okabe_ito_blue) +
theme_pubr() +
NULL
gg
type3 <- list(flow = contr.sum,
stream = contr.sum)
m1 <- lm(flow_score ~ flow*stream*median_size,
contrasts = type3,
data = flow_f2)
Anova(m1, type = "3")
## Anova Table (Type III tests)
##
## Response: flow_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.045903 1 42.9338 8.804e-10 ***
## flow 0.026912 1 25.1715 1.484e-06 ***
## stream 0.009143 5 1.7104 0.1356420
## median_size 0.039710 1 37.1410 9.083e-09 ***
## flow:stream 0.013337 5 2.4949 0.0334886 *
## flow:median_size 0.014654 1 13.7056 0.0003011 ***
## stream:median_size 0.009303 5 1.7403 0.1288680
## flow:stream:median_size 0.011935 5 2.2325 0.0540273 .
## Residuals 0.158236 148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- lm(flow_score ~ flow*stream + median_size,
contrasts = type3,
data = flow_f2)
Anova(m2, type = "3")
## Anova Table (Type III tests)
##
## Response: flow_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.074455 1 58.1383 2.090e-12 ***
## flow 0.248894 1 194.3489 < 2.2e-16 ***
## stream 0.060878 5 9.5073 5.967e-08 ***
## median_size 0.067409 1 52.6367 1.665e-11 ***
## flow:stream 0.018294 5 2.8570 0.01688 *
## Residuals 0.203624 159
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lm(flow_score ~ flow + stream + median_size,
contrasts = type3,
data = flow_f2)
Anova(m3, type = "3")
## Anova Table (Type III tests)
##
## Response: flow_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.090431 1 66.8294 7.658e-14 ***
## flow 0.272256 1 201.2002 < 2.2e-16 ***
## stream 0.051990 5 7.6843 1.628e-06 ***
## median_size 0.081475 1 60.2109 8.627e-13 ***
## Residuals 0.221918 164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
flow_levels <- c("No Flow", "Flow")
flow_f0 <- shape[treatment %in% flow_levels] # includes f2
flow_f0[, flow := factor(treatment,
levels = flow_levels)]
stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
flow_f0[, stream_id := factor(stream_id, levels = stream_id_levels)]
flow_f0[, pred_state := factor(pred_state, levels = pred_state_levels)]
# add f0 to flow treatment
flow_f0[gen == "f0", flow := "F0"]
flow_f0[, flow := factor(flow, levels = c("F0", "No Flow", "Flow"))]
# flow score
e <- c(flow_loadings[, e])
# n x 2p matrix of coordinates
X <- flow_f0[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
flow_f0[, flow_score := (X %*% e)[,1]]
flow_f0[, flow_score := flow_score - mean(flow_score)]
# flow score adjusted for median size
e <- c(flow_loadings_adj[, e])
# n x 2p matrix of coordinates
X <- flow_f0[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
flow_f0[, flow_score_adj := (X %*% e)[,1]]
flow_f0[, flow_score_adj := flow_score_adj - mean(flow_score_adj)]
# View(flow_f0[, .SD, .SDcols = c("dataset", "stream_id", "gen", "flow", "treatment")])
contrast_matrix <- diag(18)
stream_ids <- paste0(levels(flow_f2$stream_id), "_")
flow_levels <- c("f0", "noflow", "flow" )
contrast_labels <- do.call(paste0, expand.grid(stream_ids, flow_levels))
row.names(contrast_matrix) <- contrast_labels
flow_pairs <- list(
"ardc:flow - no flow" = contrast_matrix["ardc_flow",] -
contrast_matrix["ardc_noflow",],
"arin:flow - no flow" = contrast_matrix["arin_flow",] -
contrast_matrix["arin_noflow",],
"orop:flow - no flow" = contrast_matrix["orop_flow",] -
contrast_matrix["orop_noflow",],
"quar:flow - no flow" = contrast_matrix["quar_flow",] -
contrast_matrix["quar_noflow",],
"turu:flow - no flow" = contrast_matrix["turu_flow",] -
contrast_matrix["turu_noflow",],
"guan:flow - no flow" = contrast_matrix["guan_flow",] -
contrast_matrix["guan_noflow",]
)
flow_score_f0_lm1 <- lm(flow_score_adj ~ stream_id * flow,
data = flow_f0)
flow_score_f0_lm1_emm <- emmeans(flow_score_f0_lm1,
specs = c("stream_id", "flow"))
flow_score_f0_lm1_pairs <- contrast(flow_score_f0_lm1_emm,
method = flow_pairs,
adjust = "none"
)
gg3 <- ggplot_the_response(flow_score_f0_lm1, flow_score_f0_lm1_emm, flow_score_f0_lm1_pairs,
y_label = "Flow Score")
plot_grid(gg3)
Figure 5. Flow scores
fig_cap <- "Flow scores"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
type3 <- list(stream = contr.sum,
generation = contr.sum)
m1 <- lm(flow_score ~ flow*stream*median_size,
contrasts = type3,
data = flow_f0)
Anova(m1, type = "3")
## Anova Table (Type III tests)
##
## Response: flow_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.011603 1 16.0516 8.085e-05 ***
## flow 0.037496 2 25.9355 5.540e-11 ***
## stream 0.001554 5 0.4299 0.82758
## median_size 0.016679 1 23.0733 2.657e-06 ***
## flow:stream 0.014424 10 1.9953 0.03423 *
## flow:median_size 0.019159 2 13.2520 3.339e-06 ***
## stream:median_size 0.000923 5 0.2553 0.93691
## flow:stream:median_size 0.013097 10 1.8118 0.05885 .
## Residuals 0.185057 256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- lm(flow_score ~ flow*stream + median_size,
contrasts = type3,
data = flow_f0)
Anova(m2, type = "3")
## Anova Table (Type III tests)
##
## Response: flow_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.06829 1 77.8004 < 2.2e-16 ***
## flow 0.36604 2 208.5065 < 2.2e-16 ***
## stream 0.01813 5 4.1303 0.001241 **
## median_size 0.08222 1 93.6652 < 2.2e-16 ***
## flow:stream 0.03568 10 4.0646 3.185e-05 ***
## Residuals 0.23963 273
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lm(flow_score ~ flow + stream + median_size,
contrasts = type3,
data = flow_f0)
Anova(m3, type = "3")
## Anova Table (Type III tests)
##
## Response: flow_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.09544 1 98.106 < 2.2e-16 ***
## flow 0.48481 2 249.183 < 2.2e-16 ***
## stream 0.04900 5 10.073 6.866e-09 ***
## median_size 0.12074 1 124.112 < 2.2e-16 ***
## Residuals 0.27530 283
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pred_levels <- c("Flow", "Flow + pike")
pred_f2 <- shape[treatment %in% pred_levels]
pred_f2[, pred := factor(treatment,
levels = pred_levels)]
stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
pred_f2[, stream_id := factor(stream_id, levels = stream_id_levels)]
pred_f2[, pred_state := factor(pred_state, levels = pred_state_levels)]
contrast_matrix <- diag(12)
stream_ids <- paste0(levels(pred_f2$stream_id), "_")
pred_levels <- c("nopred", "pred")
contrast_labels <- do.call(paste0, expand.grid(stream_ids, pred_levels))
row.names(contrast_matrix) <- contrast_labels
pred_pairs <- list(
"ardc:pred - no pred" = contrast_matrix["ardc_pred",] -
contrast_matrix["ardc_nopred",],
"arin:pred - no pred" = contrast_matrix["arin_pred",] -
contrast_matrix["arin_nopred",],
"orop:pred - no pred" = contrast_matrix["orop_pred",] -
contrast_matrix["orop_nopred",],
"quar:pred - no pred" = contrast_matrix["quar_pred",] -
contrast_matrix["quar_nopred",],
"turu:pred - no pred" = contrast_matrix["turu_pred",] -
contrast_matrix["turu_nopred",],
"guan:pred - no pred" = contrast_matrix["guan_pred",] -
contrast_matrix["guan_nopred",]
)
pred_lm <- list()
pred_lm_emm <- list()
pred_lm_pairs <- list()
for(j in 1:length(lm_cols_x13)){
# model fit
form_j <- paste0(lm_cols_x13[j],
" ~ stream_id * pred") %>%
formula()
m1 <- lm(form_j, data = pred_f2)
# inference
m1_emm <- emmeans(m1,
specs = c("stream_id", "pred"))
m1_pairs <- contrast(m1_emm,
method = pred_pairs,
adjust = "none") %>%
summary(infer = TRUE)
# save to list
pred_lm[[length(pred_lm)+1]] <- m1
pred_lm_emm[[length(pred_lm_emm)+1]] <- m1_emm
pred_lm_pairs[[length(pred_lm_pairs)+1]] <- m1_pairs
}
names(pred_lm) <- lm_cols_x13
names(pred_lm_emm) <- lm_cols_x13
names(pred_lm_pairs) <- lm_cols_x13
pred_gg <- list()
for(j in 1:length(lm_cols_x13)){
m1 <- pred_lm[[j]]
m1_emm <- pred_lm_emm[[j]]
m1_pairs <- pred_lm_pairs[[j]]
gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
y_label = lm_cols_x13[j])
pred_gg[[length(pred_gg)+1]] <- gg
}
names(pred_gg) <- lm_cols_x13
plot_grid(pred_gg[[1]],
pred_gg[[2]], ncol = 2)
plot_grid(pred_gg[[3]],
pred_gg[[4]], ncol = 2)
plot_grid(pred_gg[[5]],
pred_gg[[6]], ncol = 2)
plot_grid(pred_gg[[7]],
pred_gg[[8]], ncol = 2)
plot_grid(pred_gg[[9]],
pred_gg[[10]], ncol = 2)
plot_grid(pred_gg[[11]],
pred_gg[[12]], ncol = 2)
plot_grid(pred_gg[[13]],
pred_gg[[14]], ncol = 2)
plot_grid(pred_gg[[15]],
pred_gg[[16]], ncol = 2)
plot_grid(pred_gg[[17]],
pred_gg[[18]], ncol = 2)
plot_grid(pred_gg[[19]],
pred_gg[[20]], ncol = 2)
plot_grid(pred_gg[[21]],
pred_gg[[22]], ncol = 2)
plot_grid(pred_gg[[23]],
pred_gg[[24]], ncol = 2)
plot_grid(pred_gg[[25]])
pred_lm_adj <- list()
pred_lm_adj_emm <- list()
pred_lm_adj_pairs <- list()
for(j in 1:length(lm_cols_x13)){
# model fit
form_j <- paste0(lm_cols_x13[j],
" ~ stream_id * pred + median_size") %>%
formula()
m1 <- lm(form_j, data = pred_f2)
# inference
m1_emm <- emmeans(m1,
specs = c("stream_id", "pred"))
m1_pairs <- contrast(m1_emm,
method = pred_pairs,
adjust = "none") %>%
summary(infer = TRUE)
# save to list
pred_lm_adj[[length(pred_lm_adj)+1]] <- m1
pred_lm_adj_emm[[length(pred_lm_adj_emm)+1]] <- m1_emm
pred_lm_adj_pairs[[length(pred_lm_adj_pairs)+1]] <- m1_pairs
}
names(pred_lm_adj) <- lm_cols_x13
names(pred_lm_adj_emm) <- lm_cols_x13
names(pred_lm_adj_pairs) <- lm_cols_x13
pred_gg <- list()
for(j in 1:length(lm_cols_x13)){
m1 <- pred_lm_adj[[j]]
m1_emm <- pred_lm_adj_emm[[j]]
m1_pairs <- pred_lm_adj_pairs[[j]]
gg <- ggplot_the_response(m1, m1_emm, m1_pairs,
y_label = lm_cols_x13[j])
pred_gg[[length(pred_gg)+1]] <- gg
}
names(pred_gg) <- lm_cols_x13
plot_grid(pred_gg[[1]],
pred_gg[[2]], ncol = 2)
plot_grid(pred_gg[[3]],
pred_gg[[4]], ncol = 2)
plot_grid(pred_gg[[5]],
pred_gg[[6]], ncol = 2)
plot_grid(pred_gg[[7]],
pred_gg[[8]], ncol = 2)
plot_grid(pred_gg[[9]],
pred_gg[[10]], ncol = 2)
plot_grid(pred_gg[[11]],
pred_gg[[12]], ncol = 2)
plot_grid(pred_gg[[13]],
pred_gg[[14]], ncol = 2)
plot_grid(pred_gg[[15]],
pred_gg[[16]], ncol = 2)
plot_grid(pred_gg[[17]],
pred_gg[[18]], ncol = 2)
plot_grid(pred_gg[[19]],
pred_gg[[20]], ncol = 2)
plot_grid(pred_gg[[21]],
pred_gg[[22]], ncol = 2)
plot_grid(pred_gg[[23]],
pred_gg[[24]], ncol = 2)
plot_grid(pred_gg[[25]])
pred_gg <- list()
for(j in 1:length(lm_cols_x13)){
m1 <- pred_lm_adj[[j]]
gg <- ggplot(data = pred_f2,
aes(x = median_size,
y = get(lm_cols_x13[j]),
color = stream_id)) +
geom_point() +
geom_ancova_grouped(m1, group_by = "pred") +
ylab(lm_cols_x13[j]) +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
pred_gg[[length(pred_gg)+1]] <- gg
}
names(pred_gg) <- lm_cols_x13
plot_grid(pred_gg[[1]],
pred_gg[[2]], ncol = 2)
plot_grid(pred_gg[[3]],
pred_gg[[4]], ncol = 2)
plot_grid(pred_gg[[5]],
pred_gg[[6]], ncol = 2)
plot_grid(pred_gg[[7]],
pred_gg[[8]], ncol = 2)
plot_grid(pred_gg[[9]],
pred_gg[[10]], ncol = 2)
plot_grid(pred_gg[[11]],
pred_gg[[12]], ncol = 2)
plot_grid(pred_gg[[13]],
pred_gg[[14]], ncol = 2)
plot_grid(pred_gg[[15]],
pred_gg[[16]], ncol = 2)
plot_grid(pred_gg[[17]],
pred_gg[[18]], ncol = 2)
plot_grid(pred_gg[[19]],
pred_gg[[20]], ncol = 2)
plot_grid(pred_gg[[21]],
pred_gg[[22]], ncol = 2)
plot_grid(pred_gg[[23]],
pred_gg[[24]], ncol = 2)
plot_grid(pred_gg[[25]])
pred_group_means <- data.table()
for(j in 1:length(lm_cols_x13)){
m1_emm <- pred_lm_emm[[j]] %>%
summary()
pred_group_means <- rbind(
pred_group_means,
data.table(coord = lm_cols_x13[j],
m1_emm)
)
}
pred_group_means[, coord := factor(coord, levels = lm_cols_x13)]
pred_group_means[, axis := substr(coord, 1, 1)]
pred_group_means[, lm := substr(coord, 2, nchar(as.character(coord)))]
pred_group_means[, lm := factor(lm,
levels = as.character(1:13))]
# cast to wide with x and y cols for each landmark
pred_group_means_wide <- dcast(pred_group_means,
pred + stream_id + lm ~ axis,
value.var = "emmean")
pred_group_means_wide[lm == "13", y := 0]
pred_treatment_means_wide <- pred_group_means_wide[, .(
x = mean(x),
y = mean(y)),
by = .(pred, lm)]
# replace eye with lm1 in treatment means table
pred_treatment_means_wide[lm == "12",
x := pred_treatment_means_wide[lm == "1", x]]
pred_treatment_means_wide[lm == "12",
y := pred_treatment_means_wide[lm == "1", y]]
gg1 <- ggplot(data = pred_group_means_wide,
aes(x = x,
y = y,
color = pred)) +
geom_point(size = 1) +
geom_path(data = pred_treatment_means_wide[lm != "13"],
aes(color = pred)) +
coord_fixed() +
theme_pubr() +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
gg1
Figure 6. Model mean shape coordinates
fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
pred_group_means_adj <- data.table()
for(j in 1:length(lm_cols_x13)){
m1_emm <- pred_lm_adj_emm[[j]] %>%
summary()
pred_group_means_adj <- rbind(
pred_group_means_adj,
data.table(coord = lm_cols_x13[j],
m1_emm)
)
}
pred_group_means_adj[, coord := factor(coord, levels = lm_cols_x13)]
pred_group_means_adj[, axis := substr(coord, 1, 1)]
pred_group_means_adj[, lm := substr(coord, 2, nchar(as.character(coord)))]
pred_group_means_adj[, lm := factor(lm,
levels = as.character(1:13))]
# cast to wide with x and y cols for each landmark
pred_group_means_wide_adj <- dcast(pred_group_means_adj,
pred + stream_id + lm ~ axis,
value.var = "emmean")
pred_group_means_wide_adj[lm == "13", y := 0]
pred_treatment_means_wide_adj <- pred_group_means_wide_adj[, .(
x = mean(x),
y = mean(y)),
by = .(pred, lm)]
# replace eye with lm1 in treatment means table
pred_treatment_means_wide_adj[lm == "12",
x := pred_treatment_means_wide_adj[lm == "1", x]]
pred_treatment_means_wide_adj[lm == "12",
y := pred_treatment_means_wide_adj[lm == "1", y]]
gg2 <- ggplot(data = pred_group_means_wide_adj,
aes(x = x,
y = y,
color = pred)) +
geom_point(size = 1) +
geom_path(data = pred_treatment_means_wide_adj[lm != "13"],
aes(color = pred)) +
coord_fixed() +
theme_pubr() +
scale_color_manual(values = pal_okabe_ito_blue) +
NULL
gg2
Figure 7. Model mean shape coordinates
fig_cap <- "Model mean shape coordinates"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
gg <- plot_grid(gg1, gg2, nrow = 2)
gg
Figure 8. A. Model mean shape coordinates for No Pred and Pred treatments. B. Size-adjusted mean shape coordinates for No Pred and Pred treatments using median size as the covariate.
fig_cap <- "A. Model mean shape coordinates for No Pred and Pred treatments. B. Size-adjusted mean shape coordinates for No Pred and Pred treatments using median size as the covariate."
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
# get treament means as means of group means
pred_treatment_means <- pred_group_means[, .(emmean = mean(emmean)),
by = .(coord, pred)]
pred_loadings <- dcast(pred_treatment_means,
coord ~ pred,
value.var = "emmean"
)
colnames(pred_loadings) <- c("lm", "pred", "no_pred")
# d is the raw difference
pred_loadings[, d := pred - no_pred]
# e is d scaled by length of d. This is like an eigenvector
pred_loadings[, e := d/sqrt(c(t(d) %*% d))]
# note - high pred fish have more negative scores so reverse direction
# so that high scores = high pred
pred_loadings[, e := -e]
e <- c(pred_loadings[, e])
t(e) %*% e # check to see if equal to one
## [,1]
## [1,] 1
# n x 2p matrix of coordinates
X <- pred_f2[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
pred_f2[, pred_score := (X %*% e)[,1]]
mean_pred_score <- mean(pred_f2[, pred_score]) # use for wild pred score
pred_f2[, pred_score := pred_score - mean(pred_score)]
# n_stream_id x 2p matrix of coordinates + pred_score
pred_f2_group_means_wide <- pred_f2[, lapply(.SD, mean),
by = .(stream_id, treatment),
.SDcols = c(lm_cols_x13, "pred_score") ]
total_var <- sum(diag(cov(pred_f2[, .SD, .SDcols = lm_cols_x13])))
pred_var <- sd(pred_f2[, pred_score])^2
pred_variance_frac <- pred_var/total_var
pred_variance_frac
## [1] 0.136145
ycols <- c("pred_score", lm_cols_x13)
r <- cor(pred_f2_group_means_wide[, .SD, .SDcols = ycols])[-1,1]
pred_loadings[, r := r]
pred_loadings %>%
kable (digits = c(1,4,4,4,2,2)) %>%
kable_styling(full_width = FALSE)
| lm | pred | no_pred | d | e | r |
|---|---|---|---|---|---|
| x1 | -0.0193 | 0.0026 | -0.0219 | 0.33 | 0.48 |
| y1 | -0.0229 | -0.0122 | -0.0107 | 0.16 | 0.78 |
| x2 | 0.5655 | 0.5770 | -0.0115 | 0.17 | 0.30 |
| y2 | 0.1924 | 0.1966 | -0.0042 | 0.06 | -0.11 |
| x3 | 1.4831 | 1.4788 | 0.0043 | -0.07 | -0.11 |
| y3 | 0.2628 | 0.2525 | 0.0103 | -0.15 | -0.56 |
| x4 | 1.7680 | 1.7459 | 0.0221 | -0.33 | -0.25 |
| y4 | 0.2054 | 0.2050 | 0.0004 | -0.01 | -0.22 |
| x5 | 2.3823 | 2.3922 | -0.0100 | 0.15 | 0.21 |
| y5 | 0.1804 | 0.1954 | -0.0150 | 0.22 | 0.68 |
| x6 | 2.4733 | 2.4654 | 0.0078 | -0.12 | -0.45 |
| y6 | 0.0019 | 0.0109 | -0.0090 | 0.14 | 0.62 |
| x7 | 2.3522 | 2.3703 | -0.0181 | 0.27 | 0.50 |
| y7 | -0.1973 | -0.1914 | -0.0059 | 0.09 | 0.63 |
| x8 | 1.6947 | 1.6911 | 0.0036 | -0.05 | -0.01 |
| y8 | -0.2293 | -0.2406 | 0.0113 | -0.17 | -0.60 |
| x9 | 1.4610 | 1.4590 | 0.0020 | -0.03 | -0.28 |
| y9 | -0.3130 | -0.3245 | 0.0115 | -0.17 | -0.71 |
| x10 | 1.1466 | 1.1464 | 0.0001 | 0.00 | -0.01 |
| y10 | -0.3655 | -0.3602 | -0.0053 | 0.08 | 0.28 |
| x11 | 0.4813 | 0.4800 | 0.0013 | -0.02 | -0.10 |
| y11 | -0.2817 | -0.2900 | 0.0083 | -0.12 | -0.41 |
| x12 | 0.2256 | 0.2205 | 0.0052 | -0.08 | -0.14 |
| y12 | -0.0082 | -0.0109 | 0.0028 | -0.04 | 0.07 |
| x13 | 3.2333 | 3.2763 | -0.0430 | 0.65 | 0.84 |
pred_score_lm1 <- lm(pred_score ~ stream_id * pred,
data = pred_f2)
ggcheck_the_model(pred_score_lm1) # increased variance in high pred groups
pred_score_lm1_emm <- emmeans(pred_score_lm1,
specs = c("stream_id", "pred"))
pred_score_lm1_pairs <- contrast(pred_score_lm1_emm,
method = pred_pairs,
adjust = "none"
)
# get treament means as means of group means
pred_treatment_means_adj <- pred_group_means_adj[, .(emmean = mean(emmean)),
by = .(coord, pred)]
pred_loadings_adj <- dcast(pred_treatment_means_adj,
coord ~ pred,
value.var = "emmean"
)
colnames(pred_loadings_adj) <- c("lm", "pred", "no_pred")
# d is the raw difference
pred_loadings_adj[, d := pred - no_pred]
# e is d scaled by length of d. This is like an eigenvector
pred_loadings_adj[, e := d/sqrt(c(t(d) %*% d))]
# note - high pred fish have more negative scores so reverse direction
# so that high scores = high pred
pred_loadings_adj[, e := -e]
e <- c(pred_loadings_adj[, e])
t(e) %*% e # check to see if equal to one
## [,1]
## [1,] 1
# n x 2p matrix of coordinates
X <- pred_f2[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
pred_f2[, pred_score_adj := (X %*% e)[,1]]
pred_f2[, pred_score_adj := pred_score_adj - mean(pred_score_adj)]
# n_stream_id x 2p matrix of coordinates + pred_score
pred_f2_group_means_wide_adj <- pred_f2[, lapply(.SD, mean),
by = .(stream_id, treatment),
.SDcols = c(lm_cols_x13, "pred_score_adj") ]
total_var <- sum(diag(cov(pred_f2[, .SD, .SDcols = lm_cols_x13])))
pred_var <- sd(pred_f2[, pred_score_adj])^2
pred_variance_frac <- pred_var/total_var
pred_variance_frac
## [1] 0.1093738
ycols <- c("pred_score_adj", lm_cols_x13)
r <- cor(pred_f2_group_means_wide_adj[, .SD, .SDcols = ycols])[-1,1]
pred_loadings_adj[, r := r]
pred_loadings_adj %>%
kable (digits = c(1,4,4,4,2,2)) %>%
kable_styling(full_width = FALSE)
| lm | pred | no_pred | d | e | r |
|---|---|---|---|---|---|
| x1 | -0.0200 | 0.0061 | -0.0262 | 0.42 | 0.65 |
| y1 | -0.0235 | -0.0096 | -0.0139 | 0.22 | 0.66 |
| x2 | 0.5681 | 0.5648 | 0.0033 | -0.05 | 0.11 |
| y2 | 0.1927 | 0.1952 | -0.0024 | 0.04 | -0.19 |
| x3 | 1.4828 | 1.4806 | 0.0021 | -0.03 | -0.40 |
| y3 | 0.2622 | 0.2553 | 0.0069 | -0.11 | -0.45 |
| x4 | 1.7708 | 1.7326 | 0.0381 | -0.61 | -0.72 |
| y4 | 0.2048 | 0.2078 | -0.0030 | 0.05 | -0.03 |
| x5 | 2.3825 | 2.3913 | -0.0088 | 0.14 | 0.58 |
| y5 | 0.1807 | 0.1944 | -0.0137 | 0.22 | 0.56 |
| x6 | 2.4725 | 2.4689 | 0.0036 | -0.06 | 0.02 |
| y6 | 0.0024 | 0.0083 | -0.0058 | 0.09 | 0.48 |
| x7 | 2.3518 | 2.3726 | -0.0208 | 0.33 | 0.73 |
| y7 | -0.1970 | -0.1931 | -0.0039 | 0.06 | 0.51 |
| x8 | 1.6946 | 1.6918 | 0.0028 | -0.04 | -0.37 |
| y8 | -0.2289 | -0.2426 | 0.0137 | -0.22 | -0.47 |
| x9 | 1.4604 | 1.4617 | -0.0012 | 0.02 | -0.45 |
| y9 | -0.3129 | -0.3252 | 0.0123 | -0.20 | -0.55 |
| x10 | 1.1456 | 1.1512 | -0.0057 | 0.09 | -0.14 |
| y10 | -0.3655 | -0.3603 | -0.0052 | 0.08 | 0.35 |
| x11 | 0.4817 | 0.4781 | 0.0037 | -0.06 | -0.05 |
| y11 | -0.2822 | -0.2876 | 0.0053 | -0.09 | -0.15 |
| x12 | 0.2253 | 0.2220 | 0.0033 | -0.05 | -0.06 |
| y12 | -0.0081 | -0.0115 | 0.0034 | -0.05 | -0.13 |
| x13 | 3.2378 | 3.2546 | -0.0168 | 0.27 | 0.42 |
pred_score_adj_lm1 <- lm(pred_score_adj ~ stream_id * pred,
data = pred_f2)
ggcheck_the_model(pred_score_adj_lm1) # increased variance in high pred groups
pred_score_adj_lm1_emm <- emmeans(pred_score_adj_lm1,
specs = c("stream_id", "pred"))
pred_score_adj_lm1_pairs <- contrast(pred_score_adj_lm1_emm,
method = pred_pairs,
adjust = "none"
)
Plot pred score by stream_id:treatment combinations
gg1 <- ggplot_the_response(pred_score_lm1, pred_score_lm1_emm, pred_score_lm1_pairs,
y_label = "Pred Score")
gg2 <- ggplot_the_response(pred_score_adj_lm1,
pred_score_adj_lm1_emm,
pred_score_adj_lm1_pairs,
y_label = "Pred Score")
plot_grid(gg1, gg2, ncol = 2)
Figure 9. Pred scores
fig_cap <- "Pred scores"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
qplot(x = pred_loadings[, e], y = pred_loadings_adj[, e])
The outlier is
pred_loadings[e > 0.6]
## lm pred no_pred d e r
## 1: x13 3.233285 3.276304 -0.04301822 0.6451447 0.8353839
lm1 <- lm(median_size ~ pred * stream_id,
data = pred_f2)
lm2 <- lm(median_size ~ pred + stream_id,
data = pred_f2)
coef(summary(lm2)) %>%
kable(digits = c(3,3,2,5)) %>%
kable_styling()
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | 0.910 | 0.020 | 44.83 | 0.00000 |
| predFlow + pike | -0.057 | 0.013 | -4.38 | 0.00003 |
| stream_idarin | -0.039 | 0.023 | -1.68 | 0.09466 |
| stream_idorop | 0.051 | 0.022 | 2.31 | 0.02285 |
| stream_idquar | 0.012 | 0.023 | 0.54 | 0.59125 |
| stream_idturu | 0.088 | 0.025 | 3.54 | 0.00057 |
| stream_idguan | -0.191 | 0.030 | -6.26 | 0.00000 |
lmm_4 <- lmer(pred_score ~ pred + median_size +
(1|stream_id) +
(1|stream_id:pred),
data = pred_f2)
lmm_4_emm <- emmeans(lmm_4, specs = "pred")
coef(summary(lmm_4)) %>%
kable(digits = c(3,3,1,1,8)) %>%
kable_styling()
| Estimate | Std. Error | df | t value | Pr(>|t|) | |
|---|---|---|---|---|---|
| (Intercept) | 0.154 | 0.040 | 72.1 | 3.8 | 0.00027858 |
| predFlow + pike | 0.051 | 0.007 | 122.8 | 7.1 | 0.00000000 |
| median_size | -0.206 | 0.043 | 85.8 | -4.7 | 0.00000869 |
m1 <- lm(pred_score ~ pred*stream*median_size, data = pred_f2)
gg <- ggplot(data = pred_f2,
aes(x = median_size,
y = pred_score,
color = stream_id,
shape = pred)) +
geom_ancova_grouped(m1, group_by = "pred") +
geom_point(size = 2) +
scale_shape_manual(values=c(16, 1)) +
scale_color_manual(values = pal_okabe_ito_blue) +
theme_pubr() +
NULL
gg
m2 <- lm(pred_score ~ pred*stream + median_size, data = pred_f2)
gg <- ggplot(data = pred_f2,
aes(x = median_size,
y = pred_score,
color = stream_id,
shape = pred)) +
geom_ancova_grouped(m2, group_by = "pred") +
geom_point(size = 2) +
scale_shape_manual(values=c(16, 1)) +
scale_color_manual(values = pal_okabe_ito_blue) +
theme_pubr() +
NULL
gg
type3 <- list(pred = contr.sum,
stream = contr.sum)
m1 <- lm(pred_score ~ pred*stream*median_size,
contrasts = type3,
data = pred_f2)
Anova(m1, type = "3")
## Anova Table (Type III tests)
##
## Response: pred_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.000458 1 0.3078 0.5803
## pred 0.000008 1 0.0056 0.9407
## stream 0.003611 5 0.4858 0.7862
## median_size 0.000217 1 0.1457 0.7035
## pred:stream 0.004666 5 0.6278 0.6789
## pred:median_size 0.000028 1 0.0190 0.8907
## stream:median_size 0.003576 5 0.4810 0.7897
## pred:stream:median_size 0.004611 5 0.6202 0.6846
## Residuals 0.151643 102
m2 <- lm(pred_score ~ pred*stream + median_size,
contrasts = type3,
data = pred_f2)
Anova(m2, type = "3")
## Anova Table (Type III tests)
##
## Response: pred_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.030148 1 21.3895 1.004e-05 ***
## pred 0.054230 1 38.4756 9.357e-09 ***
## stream 0.033070 5 4.6926 0.0006246 ***
## median_size 0.030432 1 21.5913 9.191e-06 ***
## pred:stream 0.003227 5 0.4580 0.8067318
## Residuals 0.159269 113
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m3 <- lm(pred_score ~ pred + stream + median_size,
contrasts = type3,
data = pred_f2)
Anova(m3, type = "3")
## Anova Table (Type III tests)
##
## Response: pred_score
## Sum Sq Df F value Pr(>F)
## (Intercept) 0.030117 1 21.8698 7.827e-06 ***
## pred 0.061804 1 44.8800 7.530e-10 ***
## stream 0.040212 5 5.8401 7.412e-05 ***
## median_size 0.030591 1 22.2145 6.729e-06 ***
## Residuals 0.162496 118
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pred_levels <- c("Flow", "Flow + pike") # get only greenhouse
pred_f0 <- shape[treatment %in% pred_levels | (dataset == "garden" & gen == "f0")]
# add f0 to flow treatment
pred_f0[, pred := treatment]
pred_f0[gen == "f0", pred := "F0"]
pred_f0[, pred := factor(pred, levels = c("F0", pred_levels))]
stream_id_levels <- c("ardc", "arin", "orop", "quar", "turu", "guan")
pred_state_levels <- c("High", "Med", "Low", "LowIntro" )
pred_f0[, stream_id := factor(stream_id, levels = stream_id_levels)]
pred_f0[, pred_state := factor(pred_state, levels = pred_state_levels)]
# pred score
e <- c(pred_loadings[, e])
# n x 2p matrix of coordinates
X <- pred_f0[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
pred_f0[, pred_score := (X %*% e)[,1]]
pred_f0[, pred_score := pred_score - mean(pred_score)]
# flow score adjusted for median size
e <- c(pred_loadings_adj[, e])
# n x 2p matrix of coordinates
X <- pred_f0[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
pred_f0[, pred_score_adj := (X %*% e)[,1]]
pred_f0[, pred_score_adj := pred_score_adj - mean(pred_score_adj)]
# View(pred_score_adj[, .SD, .SDcols = c("dataset", "stream_id", "gen", "flow", "treatment")])
contrast_matrix <- diag(18)
stream_ids <- paste0(levels(pred_f2$stream_id), "_")
pred_levels <- c("f0", "nopred", "pred" )
contrast_labels <- do.call(paste0, expand.grid(stream_ids, pred_levels))
row.names(contrast_matrix) <- contrast_labels
pred_pairs <- list(
"ardc:pred - no pred" = contrast_matrix["ardc_pred",] -
contrast_matrix["ardc_nopred",],
"arin:pred - no pred" = contrast_matrix["arin_pred",] -
contrast_matrix["arin_nopred",],
"orop:pred - no pred" = contrast_matrix["orop_pred",] -
contrast_matrix["orop_nopred",],
"quar:pred - no pred" = contrast_matrix["quar_pred",] -
contrast_matrix["quar_nopred",],
"turu:pred - no pred" = contrast_matrix["turu_pred",] -
contrast_matrix["turu_nopred",],
"guan:pred - no pred" = contrast_matrix["guan_pred",] -
contrast_matrix["guan_nopred",]
)
pred_score_f0_lm1 <- lm(pred_score_adj ~ stream_id * pred,
data = pred_f0)
pred_score_f0_lm1_emm <- emmeans(pred_score_f0_lm1,
specs = c("stream_id", "pred"))
pred_score_f0_lm1_pairs <- contrast(pred_score_f0_lm1_emm,
method = pred_pairs,
adjust = "none"
)
gg3 <- ggplot_the_response(pred_score_f0_lm1, pred_score_f0_lm1_emm, pred_score_f0_lm1_pairs,
y_label = "Pred Score")
plot_grid(gg3)
Figure 10. Pred scores
fig_cap <- "Pred scores"
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
}else{
cap <- capFig(fig_cap)
}
wild <- shape[dataset == "wild"]
score_by_site <- function(
fig_dat,
score, # the y variable
y_label = "score",
clickable = TRUE
){
fig_dat <- orderBy(~ pred_state + drainage, fig_dat)
fig_dat[, stream_id := factor(stream_id,
levels = unique(stream_id))]
site_n <- fig_dat[, .(N = .N),
by = .(stream_id, pred_state, drainage)]
site_sets <- site_n[, .(N = .N),
by = .(pred_state, drainage)]
site_sets[, pos_x := cumsum(N) + 0.5]
pd <- position_dodge(0.8)
gg <- ggplot(data = fig_dat,
aes(x = stream_id,
y = get(score),
color = pred_state,
shape = drainage)) +
scale_color_manual(values = pal_okabe_ito_blue) +
# geom_point(position = pd) +
geom_point_interactive(aes(tooltip = stream_id,
data_id = stream_id),
position = pd) +
geom_vline(xintercept = site_sets[, pos_x],
linetype = "dashed") +
geom_vline(xintercept = c(8.5, 16.5, 19.5),
size = 1.5) +
ylab(y_label) +
theme_pubr() +
theme(axis.text.x = element_blank()) +
NULL
# gg
# if(clickable == TRUE){
# gg <- gg + geom_point_interactive(aes(tooltip = stream_id,
# data_id = stream_id),
# position = pd)
# }else{
# gg <- gg + geom_point(position = pd)
# }
return(gg)
}
Y <- wild[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
e <- flow_loadings_adj[1:25, e]
wild[, flow_score := (Y %*% e)[, 1]]
wild[, flow_score := flow_score - mean(flow_score)]
gg <- score_by_site(wild,
score = "flow_score",
y_label = "Flow score")
# gg
fig_cap <- "Flow Scores for wild samples. Positive is high flow shape."
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
gg_print <- gg
}else{
cap <- capFig(fig_cap)
gg_print <- girafe(ggobj = gg)
}
gg_print
Figure 11. Flow Scores for wild samples. Positive is high flow shape.
Y <- wild[, .SD, .SDcols = lm_cols_x13] %>%
as.matrix()
e <- pred_loadings_adj[1:25, e]
wild[, pred_score := (Y %*% e)[, 1]]
wild[, pred_score := pred_score - mean(pred_score)]
gg <- score_by_site(wild,
score = "pred_score",
y_label = "Predator score")
# gg
fig_cap <- "Predator Scores for wild samples. Positive is high predator shape."
if(is.null(getOption("knitr.in.progress"))){
cap <- fig_cap
gg_print <- gg
}else{
cap <- capFig(fig_cap)
gg_print <- girafe(ggobj = gg)
}
gg_print
Figure 12. Predator Scores for wild samples. Positive is high predator shape.
qplot(x = centroid_size, y = pred_score, data = wild)
qplot(x = centroid_size, y = flow_score, data = wild)